In [11]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import csv
import networkx as nx
import pylab as plt
from collections import Counter
import community
In [192]:
# get meme_names
meme_name="thevoice"
results_path="/home/clemsos/Dev/mitras/results/"
meme_path=results_path+"/"+meme_name
In [193]:
meme_graph_csv=meme_path+"/"+meme_name+"_edges.csv"
# meme_graph_csv=meme_path+"/"+meme_name+"_d3graph.csv"
with open(meme_graph_csv, 'rb') as edgefile:
edgefile.next() # skip headers
edgecsv=csv.reader(edgefile)
# edges=[ str(str(edge[0])+","+str(edge[1])+","+str(edge[2])) for edge in edgecsv]
edges=[(e[0], e[1]) for e in edgecsv]
edges_weighted=[str(p[0][0]+" "+p[0][1]+" "+str(p[1])) for p in Counter(edges).most_common()] # if p[1] > 1]
print "Edges (raw files) : %d edges"%len(edges)
print "Weighted edges %d"%len(edges_weighted)
G = nx.read_weighted_edgelist(edges_weighted, nodetype=str, delimiter=" ",create_using=nx.DiGraph())
# G=nx.read_weighted_edgelist(edgefile, create_using=nx.DiGraph(), delimiter=",")
In [195]:
G_components = nx.connected_component_subgraphs(G.to_undirected())
print "Number of components: %d"%nx.number_connected_components(G.to_undirected())
# print "Number of strongly connected components: %d"%nx.number_strongly_connected_components(G)
main_components=[g for g in G_components[0:5]]
for i,g in enumerate(main_components):
print "cluster %i : %.3f %% "%(i,(float(g.number_of_nodes())/G.number_of_nodes())*100)
In [196]:
G_mc = G_components[0]
G_mc_per_total= (float(G_mc.number_of_nodes())/G.number_of_nodes())*100
print "Biggest connected component is %.2f %% of the graph" % G_mc_per_total
nx.write_graphml(G_mc, meme_path+"/"+meme_name+"_main_component.graphml")
# use the most connected component as the main one
# G = G_components[0]
Measures how well a network decomposes into modular communities.
A high modularity score indicates sophisticated internal structure. This structure, often called a community structure, describes how the the network is compartmentalized into sub-networks. These sub-networks (or communities) have been shown to have significant real-world meaning.
In [168]:
# create dendogram
dendo = community.generate_dendogram(G.to_undirected())
for level in range(len(dendo) - 1) :
print "partition at level", level #, "is", community.partition_at_level(dendo, level)
In [172]:
# from http://perso.crans.org/aynaud/communities/
# Best partition
best_partition = community.best_partition(G.to_undirected())
modularity=community.modularity(best_partition,G.to_undirected())
print "Modularity of the best partition: %f"%modularity
print "Number of nodes in the best partition : ", len(set(best_partition.values()))
In [173]:
G_ok=community.induced_graph(best_partition,G.to_undirected())
nx.write_graphml(G_ok, meme_path+"/"+meme_name+"_best_partition.graphml")
# nx.draw(G_ok)
# print best_partition.values()
# for index in best_partition:
# print best_partition[index]
#print a
# print "modularity: %.3f %%"%(community.modularity(communities, G.to_undirected()))
# Modularity
# partitions = community.best_partition(G.to_undirected())
# print partition.values()
Betweenness centrality is a measure of a node's centrality in a network. It is equal to the number of shortest paths from all vertices to all others that pass through that node. Betweenness centrality is a more useful measure (than just connectivity) of both the load and importance of a node. The former is more global to the network, whereas the latter is only a local effect.
In [186]:
# Create ordered tuple of centrality data
cent_dict=nx.betweenness_centrality (G.to_undirected())
cent_items=[(b,a) for (a,b) in cent_dict.iteritems()]
# Sort in descending order
cent_items.sort()
cent_items.reverse()
# Highest centrality
for c in cent_items[0:5]:
print "Highest betweeness centrality :%f"%c[0]
In [194]:
%pylab inline
in_degrees = G.in_degree() # dictionary node:degree
in_count=[v for v in Counter(in_degrees.values()).most_common() if v[0] > 2]
in_values=[v[0] for v in in_count]
in_hist=[v[1] for v in in_count]
out_degrees = G.out_degree() # dictionary node:degree
out_count=[v for v in Counter(out_degrees.values()).most_common() if v[0] > 2]
out_values=[v[0] for v in out_count]
out_hist=[v[1] for v in out_count]
in_degree_sequence=sorted(G.in_degree().values(),reverse=True) # degree sequence
out_degree_sequence=sorted(G.out_degree().values(),reverse=True) # degree sequence
dmax=max(degree_sequence)
plt.loglog(in_degree_sequence,'b-',marker='o')
plt.loglog(out_degree_sequence,'r-',marker='v')
# plt.figure()
# plt.plot(in_values,in_hist,'rx') # in-degree
# plt.plot(out_values,out_hist,'bv') # out-degree
plt.legend(['In-degree','Out-degree'])
plt.xlabel('Rank')
plt.ylabel('Degree')
plt.title(meme_name+' degree distribution')
for d in Counter(in_degrees.values()).most_common(5):
print "Highest in-degree value : %d"% d[1]
for d in Counter(out_degrees.values()).most_common(5):
print "Highest out-degree value : %d"% d[1]
In [29]:
# draw partition graph
size = float(len(set(partition.values())))
pos = nx.spring_layout(G.to_undirected())
count = 0.
for com in set(partition.values()) :
count = count + 1.
list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com]
nx.draw_networkx_nodes(G.to_undirected(), pos, list_nodes, node_size = 20, node_color = str(count / size))
# nx.draw_networkx_edges(G.to_undirected(),pos, alpha=0.5)
Out[29]:
A clustering coefficient is a measure of the degree to which nodes in a graph tend to cluster together. Evidence suggests that in most real-world networks, and in particular social networks, nodes tend to create tightly knit groups characterised by a relatively high density of ti
The global clustering coefficient is based on triplets of nodes.
Network average clustering coefficient
A graph is considered small-world :
In [65]:
N,K = G.order(), G.size()
print "Nodes: ", N
print "Edges: ", K
avg_deg = float(K)/N
print "Average degree: ", avg_deg
# Clustering coefficient of all nodes (in a dictionary)
ccs = nx.clustering(G.to_undirected())
# Average clustering coefficient
avg_clust_coef = sum(ccs.values()) / len(ccs)
# also : nx.algorithms.cluster.average_clustering(G.to_undirected())
print "Average clustering coeficient: %f"%avg_clust_coef
# random graph
# G_random=nx.erdos_renyi_graph(len(G.nodes()), 0.5)
# nx.algorithms.cluster.average_clustering(G_random)
In [1]:
cliques=[c for c in nx.find_cliques(G.to_undirected())]
cliques_length=[len(c) for c in nx.find_cliques(G.to_undirected())]
print "total cliques: %d"%len(cliques_length)
print "max clique length: %d"%max(cliques_length)
print "average clique length: %d"%average(cliques_length)
cliques_dist=Counter(cliques_length).most_common()
title="Cliques distribution"
plt.bar([c[0]for c in cliques_dist ],[c[1]for c in cliques_dist ])
plt.grid(True,linestyle='-',color='0.75')
plt.ylabel('Counts')
plt.title(title,fontstyle='italic')
In [ ]:
In [176]:
# nx.average_shortest_path_length(G)
# G_components = nx.connected_component_subgraphs(G.to_undirected())
#for g in G_components:
# print(nx.average_shortest_path_length(g))
In [247]:
In [218]:
try:
from networkx import graphviz_layout
layout=nx.graphviz_layout
except ImportError:
print "PyGraphviz not found; drawing with spring layout; will be slow."
layout=nx.spring_layout
region=220 # for pylab 2x2 subplot layout
plt.subplots_adjust(left=0,right=1,bottom=0,top=0.95,wspace=0.01,hspace=0.01)
pvals=[0.003, 0.006, 0.008, 0.015]
for p in pvals:
# G=nx.binomial_graph(n,p)
pos=layout(G)
region+=1
plt.subplot(region)
plt.title("p = %6.3f"%(p))
nx.draw(G,pos,
with_labels=False,
node_size=10
)
# identify largest connected component
Gcc=nx.connected_component_subgraphs(G)
G0=Gcc[0]
nx.draw_networkx_edges(G0,pos,
with_labels=False,
edge_color='r',
width=6.0
)
# show other connected components
for Gi in Gcc[1:]:
if len(Gi)>1:
nx.draw_networkx_edges(Gi,pos,
with_labels=False,
edge_color='r',
alpha=0.3,
width=5.0
)
In [149]:
# from http://perso.crans.org/aynaud/communities/
partition = community.best_partition(G.to_undirected())
print "Partitions found: ", len(set(partition.values()))
# Modularity
modularity=community.modularity(partition,G.to_undirected())
print "Modularity: %f"%modularity
In [122]:
size = float(len(set(partition.values())))
pos = nx.spring_layout(G.to_undirected())
count = 0.
for com in set(partition.values()) :
count = count + 1.
list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com]
nx.draw_networkx_nodes(G.to_undirected(), pos, list_nodes, node_size = 20, node_color = str(count / size))
nx.draw_networkx_edges(G.to_undirected(),pos, alpha=0.5)
Out[122]:
In [119]:
import numpy as np
ddg=community.generate_dendogram(G.to_undirected())
len(ddg)
#for level in range(len(ddg) - 1) :
# print "partition at level", level, "is", community.partition_at_level(ddg, level)
Out[119]:
In [8]:
# Betweenness centrality
# bet_cen = nx.betweenness_centrality(G_mc)
# print "average_betweenness_centrality %f"%bet_cen
# Closeness centrality
# clo_cen = nx.closeness_centrality(G_mc)
# print "closeness_centrality %f"%clo_cen
# Eigenvector centrality
# eig_cen = nx.eigenvector_centrality(G_mc)
# print eig_cen
#print "Eigenvector centrality : %f"%eig_cen[0]